Pre-processing of the data

ls_preprocessed <- preprocess_rna(path_rnaseq = 'rnaseq.RData', correct_batch = T, correct_gender = T)

Exploring data

Batch effect correction

print(ls_preprocessed$pbatch_bf)

print(ls_preprocessed$pgender_bf)

print(ls_preprocessed$pbatch_af)

print(ls_preprocessed$pgender_af)

DE analysis

DE_res <- DE_analysis(ls_preprocessed, 
           GeneBased=TRUE, 
           pDataBased=FALSE,
           NewCondition=FALSE,
           cond_nm='ENSG00000151012.9',
           reference = 'low', # low, alive
           correct_gender=TRUE,
           extremes_only=TRUE)
## Unlist done
## Labeling done
## Filtering done
## factor levels were dropped which had no samples
## Design done
## factor levels were dropped which had no samples
## Warning: Column `gene`/`ENSEMBL` joining character vector and factor, coercing
## into character vector
## vsd symbols done
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 1526 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## DESeq done
## using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).
## 
## Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
## See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
## Reference: https://doi.org/10.1093/bioinformatics/bty895
## Warning: Column `gene`/`ENSEMBL` joining character vector and factor, coercing
## into character vector
## res symbols done
## list done

DE results

heatmap_200(DE_res$res_df, DE_res$vsd_mat_sym, DE_res$meta_data, DE_res$pData_rnaseq)

x <- DE_res$res_df %>%
  arrange(desc(abs(log2FoldChange)))
rownames(x) <- make.names(x$symbol, unique = T)
k <- c('ENSG00000250033.1', 'ENSG00000151012.9')
x <- x[-which(x$gene %in%k),]
head(x, 10)
##                  baseMean log2FoldChange     lfcSE       stat       pvalue
## CRP              1.313927       3.876456 0.3567685 -0.1290811 8.972935e-01
## NSG2             5.806703       3.500774 0.3464664  1.9145769 5.554648e-02
## TRPM5           71.511963       3.290770 0.3183576  2.7024817 6.882397e-03
## RP11.443P15.2  373.936082       3.170488 0.3612413  8.8377859 9.763416e-19
## SH2D6           39.161949       3.163835 0.2504549  2.7665551 5.665201e-03
## CALML3           2.700342       3.150567 0.3854255 -0.4306354 6.667335e-01
## AKR1C1        5980.093163       3.150479 0.3673433  8.7648879 1.869627e-18
## PRSS33           4.239273       3.119136 0.3679369 -0.4910499 6.233912e-01
## RP11.749H17.2   37.671876       3.056101 0.3930168  8.0926310 5.838955e-16
## SLC14A2        311.178239       2.926689 0.3715233  8.0581331 7.746842e-16
##                       padj               gene        symbol
## CRP                     NA  ENSG00000132693.8           CRP
## NSG2          3.266530e-01  ENSG00000170091.6          NSG2
## TRPM5         1.110171e-01  ENSG00000070985.9         TRPM5
## RP11.443P15.2 1.183359e-14  ENSG00000171658.4 RP11-443P15.2
## SH2D6         9.989930e-02 ENSG00000152292.12         SH2D6
## CALML3        8.824002e-01  ENSG00000178363.3        CALML3
## AKR1C1        1.699537e-14  ENSG00000187134.8        AKR1C1
## PRSS33        8.635078e-01  ENSG00000103355.8        PRSS33
## RP11.749H17.2 4.246205e-12  ENSG00000267354.1 RP11-749H17.2
## SLC14A2       4.694716e-12  ENSG00000132874.9       SLC14A2
volcano_plot(x, gene=NULL, p_title='SLC7A11')

Pathway enrichment analysis fGSEA

Low SLC7A11 is the reference. When SLC7A11 is high, pathways shown below are up- or down- regulated

fgsea_res <- fgsea_analysis(DE_res)
## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.1% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.1% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.1% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.1% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.1% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.1% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.1% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.1% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.1% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fgsea_plot(fgsea_res$res_hm, pathways_title='Hallmark', condition_name='SLC7A11 low vs high')

fgsea_plot(fgsea_res$res_c1, pathways_title='C1 positional genes', condition_name='SLC7A11 low vs high')

fgsea_plot(fgsea_res$res_c2, pathways_title='C2 curated genes', condition_name='SLC7A11 low vs high')

fgsea_plot(fgsea_res$res_c3, pathways_title='C3 regulatory target genes', condition_name='SLC7A11 low vs high')

fgsea_plot(fgsea_res$res_c4, pathways_title='C4 cancer', condition_name='SLC7A11 low vs high')

fgsea_plot(fgsea_res$res_c5, pathways_title='C5 GO genes', condition_name='SLC7A11 low vs high')

fgsea_plot(fgsea_res$res_c6, pathways_title='C6 oncogenic', condition_name='SLC7A11 low vs high')

fgsea_plot(fgsea_res$res_c7, pathways_title='C7 immunologic', condition_name='SLC7A11 low vs high')

fgsea_plot(fgsea_res$res_msg, pathways_title='All signatures', condition_name='SLC7A11 low vs high')